
clear all
set more off
set trace off
cap log close
set scheme sj

cap log close
log using "$log/10_ex_margin_${S_DATE}", text replace

dis "$S_DATE" " " "$S_TIME"

use "$temp/analysis_pt_new", clear

preserve
keep persnr
duplicates drop
set seed 666
sample 50 
gen byte sample = 1
save "$temp/50p_sample_persnr", replace
restore

merge m:1 persnr using "$temp/50p_sample_persnr"
replace sample = 0 if missing(sample)
tab sample
drop _merge
compress

********************************************************************************
* Deflation of Nominal Variables *
********************************************************************************

*Consumer Price Index West Germany 1975 - 1991 (base year 1995)
gen cpi=.
label variable cpi "Consumer Price Index"
replace cpi = 54.5 if year == 1975
replace cpi = 56.8 if year == 1976
replace cpi = 58.9 if year == 1977
replace cpi = 60.5 if year == 1978
replace cpi = 63.0 if year == 1979
replace cpi = 66.4 if year == 1980
replace cpi = 70.6 if year == 1981
replace cpi = 74.3 if year == 1982
replace cpi = 76.7 if year == 1983
replace cpi = 78.6 if year == 1984
replace cpi = 80.2 if year == 1985
replace cpi = 80.1 if year == 1986
replace cpi = 80.3 if year == 1987
replace cpi = 81.3 if year == 1988
replace cpi = 83.6 if year == 1989
replace cpi = 85.8 if year == 1990
replace cpi = 89.0 if year == 1991

*Set 2015 as base year for the period 1975 - 1991
replace cpi = (cpi/89.0)*65.5		// see Statistisches Bundesamt (2019)

*Consumer Price Index Germany (West and East, after 1991)
replace cpi =  68.8 if year == 1992
replace cpi =  71.9 if year == 1993
replace cpi =  73.8 if year == 1994
replace cpi =  75.1 if year == 1995
replace cpi =  76.1 if year == 1996
replace cpi =  77.6 if year == 1997
replace cpi =  78.3 if year == 1998
replace cpi =  78.8 if year == 1999
replace cpi =  79.9 if year == 2000
replace cpi =  81.5 if year == 2001
replace cpi =  82.6 if year == 2002
replace cpi =  83.5 if year == 2003
replace cpi =  84.9 if year == 2004
replace cpi =  86.2 if year == 2005
replace cpi =  87.6 if year == 2006
replace cpi =  89.6 if year == 2007
replace cpi =  91.9 if year == 2008
replace cpi =  92.2 if year == 2009
replace cpi =  93.2 if year == 2010
replace cpi =  95.2 if year == 2011
replace cpi =  97.1 if year == 2012
replace cpi =  98.5 if year == 2013
replace cpi =  99.5 if year == 2014
replace cpi = 100.0 if year == 2015
replace cpi = 100.5 if year == 2016
replace cpi = 102.0 if year == 2017
replace cpi = 103.8 if year == 2018
replace cpi = 105.3 if year == 2019

*Deflate wages, marginal part-time income threshold and contribution assessment ceiling (base year 2015)
gen wage_ft_defl      = 100 * wage_ft / cpi
gen wage_defl         = 100 * wage / cpi
gen aearnings_ft_defl = 100 * aearnings_ft / cpi
gen aearnings_defl    = 100 * aearnings / cpi
drop cpi


********************************************************************************
* Define Main Analysis Sample 
********************************************************************************
keep if inlist(train_edu,1,3)
drop if year_entry == 1997

gen tag_censored = (year(train_start)==1998 & month(train_start)==1)
drop if tag_censored == 1

su potential_experience
drop if potential_experience == 20 
drop if potential_experience < 0

gen dj_ft_earn_def = spell_length_ft * wage_ft_defl * dom_job_ft

global stable_vars = "year_entry german gender train_end_age month_entry train_edu train_occ region_entry U_internal"	

keep $stable_vars dj_ft_earn_def aearnings_defl persnr year sample betnr dom_job_ft spell_length_ft wage_ft_defl

gen ffe = .
		forval t = 1999(3)2017 {	
			merge m:1 betnr using "$FE//firmfe_value_new_ver`t'3_bc.dta", keep(1 3) nogen keepusing(firm_fe)
			replace ffe = firm_fe if year>=`t'-1 & year<=`t'+1
			drop firm_fe
		}		

gen sc_earn_def = spell_length_ft * wage_ft_defl * dom_job_ft if !missing(ffe)

drop dom_job_ft betnr ffe spell_length_ft wage_ft_defl

collapse (mean) $stable_vars aearnings_defl sample (sum) dj_ft_earn_def sc_earn_def, by(persnr year)
compress

tsset persnr year 
tsfill, full

foreach var of varlist $stable_vars {
	bysort persnr: egen temp = mean(`var') 
	replace `var' = temp if missing(`var')
	drop temp
	}
	
drop if year < year_entry
gen potential_experience = year - year_entry
drop if potential_experience == 20 
drop if potential_experience < 0
compress

gen log_aearnings_defl = log(aearnings_defl) 
replace log_aearnings_defl = 0 if missing(aearnings_defl)

gen byte employed = (!missing(aearnings_defl))
replace employed = 0 if aearnings_defl == 0
replace aearnings_defl = 0 if missing(aearnings_defl)
replace dj_ft_earn_def = 0 if missing(dj_ft_earn_def)
replace sc_earn_def = 0 if missing(sc_earn_def)

foreach var of varlist $stable_vars {
	count if missing(`var')
	}
foreach var of varlist $stable_vars {
	drop if missing(`var')
	}

gen log_sc_earn_def  = log(sc_earn_def)
gen log_dj_ft_earn_def  = log(dj_ft_earn_def)
compress

drop dj_ft_earn_def log_dj_ft_earn_def log_aearnings_defl log_sc_earn_def
	
save "$temp/full_with_sample", replace

drop if sample == 0
drop sample

local def_out "employed sc_earn_def aearnings_defl"

local fix_absorb = "year_entry month_entry potential_experience year german gender train_end_age train_edu train_occ region_entry" 

foreach out of varlist `def_out' {
	reghdfe `out' i.potential_experience#c.U_internal, absorb(`fix_absorb') vce(cl region_entry)
 	reghdfe `out' i.potential_experience#c.U_internal if `out' > 0, absorb(`fix_absorb') vce(cl region_entry)	
	tabstat `out', by(potential_experience)
	tabstat `out' if `out' > 0, by(potential_experience)
	drop `out'
}



use "$temp/full_with_sample", clear

drop if sample == 1
drop sample

foreach out of varlist `def_out' { 
	reghdfe `out' i.potential_experience#c.U_internal, absorb(`fix_absorb') vce(cl region_entry)
	reghdfe `out' i.potential_experience#c.U_internal if `out' > 0, absorb(`fix_absorb') vce(cl region_entry)	
	tabstat `out', by(potential_experience)
	tabstat `out' if `out' > 0, by(potential_experience)
	drop `out'
}

erase "$temp/full_with_sample.dta"
erase "$temp/50p_sample_persnr.dta"

cap log close
clear
